{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)\n",
    "\n",
    "import sys\n",
    "import os\n",
    "import time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "sys.path.insert(0, os.path.realpath('..'))\n",
    "import folium\n",
    "import django\n",
    "import sqlite3\n",
    "django.setup()\n",
    "from django.db import connections\n",
    "from pretty import pprint\n",
    "from firecares.firestation.models import (FireDepartment, FireStation, NFIRSStatistic, FireDepartmentRiskModels,\n",
    "                                          PopulationClassQuartile)\n",
    "from fire_risk.models import DIST, DISTMediumHazard, DISTHighHazard\n",
    "from fire_risk.models.DIST.providers.ahs import ahs_building_areas\n",
    "from fire_risk.models.DIST.providers.iaff import response_time_distributions\n",
    "from django.db.models import Avg, Max, Min, Q\n",
    "from django.contrib.gis.geos import GEOSGeometry\n",
    "from IPython.display import display\n",
    "from firecares.utils import lenient_summation, dictfetchall\n",
    "from firecares.tasks.update import (calculate_department_census_geom, calculate_story_distribution,\n",
    "                                    calculate_structure_counts, update_performance_score, update_nfirs_counts,\n",
    "                                    dist_model_for_hazard_level)\n",
    "pd.set_option(\"display.max_rows\", 2000)\n",
    "pd.set_option(\"display.max_columns\", 100)\n",
    "\n",
    "def display_geom(geom):\n",
    "    _map = folium.Map(location=[geom.centroid.y, geom.centroid.x],\n",
    "                      tiles='Stamen Toner')\n",
    "    _map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')\n",
    "    ll = geom.extent[1::-1]\n",
    "    ur = geom.extent[3:1:-1]\n",
    "    _map.fit_bounds([ll, ur])\n",
    "\n",
    "    return _map\n",
    "\n",
    "# Philadephia-specific\n",
    "fd = FireDepartment.objects.get(id=91907)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>risk_category</th>\n",
       "      <th>object_of_origin</th>\n",
       "      <th>room_of_origin</th>\n",
       "      <th>floor_of_origin</th>\n",
       "      <th>building_of_origin</th>\n",
       "      <th>beyond</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Low</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Medium</td>\n",
       "      <td>10</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>N/A</td>\n",
       "      <td>7266</td>\n",
       "      <td>2348</td>\n",
       "      <td>673</td>\n",
       "      <td>621</td>\n",
       "      <td>361</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  risk_category  object_of_origin  room_of_origin  floor_of_origin  \\\n",
       "0           Low                 1             NaN              NaN   \n",
       "1        Medium                10               5              NaN   \n",
       "2           N/A              7266            2348              673   \n",
       "\n",
       "   building_of_origin  beyond  \n",
       "0                 NaN     NaN  \n",
       "1                 NaN     NaN  \n",
       "2                 621     361  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q = \"\"\"SELECT *\n",
    "    FROM crosstab(\n",
    "      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)\n",
    "        FROM buildingfires a left join (\n",
    "          SELECT state,\n",
    "            fdid,\n",
    "            inc_date,\n",
    "            inc_no,\n",
    "            exp_no,\n",
    "            geom,\n",
    "            x.parcel_id,\n",
    "            x.risk_category\n",
    "          FROM (select * from incidentaddress a\n",
    "             left join parcel_risk_category_local b using (parcel_id)\n",
    "             ) AS x\n",
    "        ) AS y using (state, inc_date, exp_no, fdid, inc_no)\n",
    "    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')\n",
    "        and fire_sprd is not null and fire_sprd != ''''\n",
    "    group by risk_category, fire_sprd\n",
    "    order by risk_category, fire_sprd ASC')\n",
    "    AS ct(risk_category text, \"object_of_origin\" bigint, \"room_of_origin\" bigint, \"floor_of_origin\" bigint, \"building_of_origin\" bigint, \"beyond\" bigint);\"\"\"\n",
    "\n",
    "pd.read_sql_query(q, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error updating DIST score: Traceback (most recent call last):\n",
      "  File \"/firecares/firecares/tasks/update.py\", line 126, in update_performance_score\n",
      "    dist = dist_model(floor_extent=False, **counts)\n",
      "  File \"/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py\", line 89, in __init__\n",
      "    raise NotEnoughRecords\n",
      "NotEnoughRecords\n",
      ".\n",
      "Error updating DIST score: Traceback (most recent call last):\n",
      "  File \"/firecares/firecares/tasks/update.py\", line 126, in update_performance_score\n",
      "    dist = dist_model(floor_extent=False, **counts)\n",
      "  File \"/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py\", line 383, in __init__\n",
      "    beyond, **kwargs)\n",
      "  File \"/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py\", line 89, in __init__\n",
      "    raise NotEnoughRecords\n",
      "NotEnoughRecords\n",
      ".\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/webapps/firecares/local/lib/python2.7/site-packages/numpy/core/numeric.py:294: FutureWarning: in the future, full((10000, 4), -1000) will return an array of dtype('int64')\n",
      "  format(shape, fill_value, array(fill_value).dtype), FutureWarning)\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "updating fdid: 91907 - Unknown risk level from: 7.0 to 7.0.\n",
      "clearing High level from 91907 due to missing categories in aggregation\n",
      "updating fdid: 91907 - All risk level from: 7.0 to 7.0.\n"
     ]
    }
   ],
   "source": [
    "update_performance_score(fd.id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Calculate quartiles based on similar departments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>dist_model_score</th>\n",
       "      <th>level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>91907</td>\n",
       "      <td>Philadelphia Fire Department</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Medium hazard</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>91907</td>\n",
       "      <td>Philadelphia Fire Department</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Low hazard</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>91907</td>\n",
       "      <td>Philadelphia Fire Department</td>\n",
       "      <td>7</td>\n",
       "      <td>All hazard levels</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>91907</td>\n",
       "      <td>Philadelphia Fire Department</td>\n",
       "      <td>NaN</td>\n",
       "      <td>High hazard</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>91907</td>\n",
       "      <td>Philadelphia Fire Department</td>\n",
       "      <td>7</td>\n",
       "      <td>Unknown hazard</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id                          name  dist_model_score              level\n",
       "60  91907  Philadelphia Fire Department               NaN      Medium hazard\n",
       "61  91907  Philadelphia Fire Department               NaN         Low hazard\n",
       "62  91907  Philadelphia Fire Department                 7  All hazard levels\n",
       "63  91907  Philadelphia Fire Department               NaN        High hazard\n",
       "64  91907  Philadelphia Fire Department                 7     Unknown hazard"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q = \"\"\"SELECT\n",
    "            fd.\"id\",\n",
    "            fd.\"fdid\",\n",
    "            fd.\"name\",\n",
    "            fd.\"state\",\n",
    "            fd.\"region\",\n",
    "            (SELECT COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) AS \"risk_model_size1_percent_size2_percent_sum\",\n",
    "            (SELECT COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) AS \"risk_model_deaths_injuries_sum\",\n",
    "            rm.\"dist_model_score\",\n",
    "            rm.\"risk_model_deaths\",\n",
    "            rm.\"risk_model_injuries\",\n",
    "            rm.\"risk_model_fires\",\n",
    "            rm.\"risk_model_fires_size0\",\n",
    "            rm.\"risk_model_fires_size0_percentage\",\n",
    "            rm.\"risk_model_fires_size1\",\n",
    "            rm.\"risk_model_fires_size1_percentage\",\n",
    "            rm.\"risk_model_fires_size2\",\n",
    "            rm.\"risk_model_fires_size2_percentage\",\n",
    "            fd.\"population\",\n",
    "            fd.\"population_class\",\n",
    "            fd.\"featured\",\n",
    "            nfirs.avg_fires as \"residential_fires_avg_3_years\",\n",
    "            rm.\"level\",\n",
    "            CASE WHEN (rm.\"risk_model_fires_size1_percentage\" IS NOT NULL OR rm.\"risk_model_fires_size2_percentage\" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) ELSE NULL END AS \"risk_model_size1_percent_size2_percent_sum_quartile\",\n",
    "            CASE WHEN (rm.\"risk_model_deaths\" IS NOT NULL OR rm.\"risk_model_injuries\" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) ELSE NULL END AS \"risk_model_deaths_injuries_sum_quartile\",\n",
    "            CASE WHEN rm.\"dist_model_score\" IS NOT NULL THEN ntile(4) over (partition by rm.dist_model_score is not null, fd.population_class, rm.level order by rm.dist_model_score) ELSE NULL END AS \"dist_model_score_quartile\",\n",
    "            CASE WHEN rm.\"risk_model_deaths\" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_deaths is not null, fd.population_class, rm.level order by rm.risk_model_deaths) ELSE NULL END AS \"risk_model_deaths_quartile\",\n",
    "            CASE WHEN rm.\"risk_model_injuries\" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_injuries is not null, fd.population_class, rm.level order by rm.risk_model_injuries) ELSE NULL END AS \"risk_model_injuries_quartile\",\n",
    "            CASE WHEN rm.\"risk_model_fires_size0\" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size0 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size0) ELSE NULL END AS \"risk_model_fires_size0_quartile\",\n",
    "            CASE WHEN rm.\"risk_model_fires_size1\" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size1 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size1) ELSE NULL END AS \"risk_model_fires_size1_quartile\",\n",
    "            CASE WHEN rm.\"risk_model_fires_size2\" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size2 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size2) ELSE NULL END AS \"risk_model_fires_size2_quartile\",\n",
    "            CASE WHEN rm.\"risk_model_fires\" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires is not null, fd.population_class, rm.level order by rm.risk_model_fires) ELSE NULL END AS \"risk_model_fires_quartile\",\n",
    "            CASE WHEN \"nfirs\".\"avg_fires\" IS NOT NULL THEN ntile(4) over (partition by avg_fires is not null, fd.population_class, rm.level order by avg_fires) ELSE NULL END AS \"residential_fires_avg_3_years_quartile\"\n",
    "\n",
    "        FROM \"firestation_firedepartment\" fd\n",
    "        INNER JOIN \"firestation_firedepartmentriskmodels\" rm ON\n",
    "            rm.department_id = fd.id\n",
    "        LEFT JOIN (\n",
    "            SELECT fire_department_id, AVG(count) as avg_fires, level\n",
    "            from firestation_nfirsstatistic\n",
    "            WHERE year >= 2010 and metric='residential_structure_fires'\n",
    "            GROUP BY fire_department_id, level) nfirs\n",
    "        ON (fd.id=nfirs.fire_department_id and nfirs.level = rm.level)\n",
    "        WHERE fd.archived=False and fd.population_class = 9\n",
    "        ORDER BY fd.id\"\"\"\n",
    "\n",
    "df = pd.read_sql_query(q, connections['default'])\n",
    "\n",
    "levels = {'0': 'All hazard levels', '1': 'Low hazard', '2': 'Medium hazard', '4': 'High hazard', '5': 'Unknown hazard'}\n",
    "quartiles = {'1': 'Low risk', '2': 'Medium risk', '3': 'Medium risk', '4': 'High risk'}\n",
    "\n",
    "# Transformations to human-readable values\n",
    "df['level'] = df['level'].apply(lambda x: levels[str(x)])\n",
    "for c in ['risk_model_size1_percent_size2_percent_sum_quartile', 'risk_model_deaths_injuries_sum_quartile', 'dist_model_score_quartile', 'risk_model_deaths_quartile', 'risk_model_injuries_quartile', 'risk_model_fires_size0_quartile', 'risk_model_fires_size1_quartile', 'risk_model_fires_size2_quartile', 'risk_model_fires_quartile', 'residential_fires_avg_3_years_quartile']:\n",
    "    df[c] = df[c].apply(lambda x: quartiles[str(int(x))] if not np.isnan(x) else 'N/A')\n",
    "\n",
    "df[df['id'] == fd.id][['id', 'name', 'dist_model_score', 'level']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Full-cycle statistics import/calcuations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### At a minimum-we need to import the department and ALL related departments to get an accurate score\n",
    "\n",
    "- Clear out FireDepartmentRiskModels, NFIRSStatistic\n",
    "- Refresh `population_quartiles` materialized view (to ensure that we're starting w/ a clean slate)\n",
    "- Run `update_nfirs` django command to pull NFIRS stats into FireCARES per department\n",
    "    - Creates/updated NFIRSStatistic records\n",
    "    - Depends on NFIRS.buildingfires, FC FireDepartment\n",
    "- Run `calc-department-owned-tracts` django command to calculate the census-tract based department jurisdiction\n",
    "    - Updates `FireDepartment.owned_tract_geom` on FC FireDepartment\n",
    "    - Depends on NFIRS.census_block_groups_2010, NIST.tract_years, FC FireDepartment\n",
    "- Run `calc-structure-counts` django command to determine the structure counts over a department's census-tract-based jurisdiction\n",
    "    - Updates `FireDepartmentRiskModels.structure_count` on FC FireDepartmentRiskModels\n",
    "    - Depends on NFIRS.parcel_risk_category_local, FC FireDepartment.owned_tracts_geom\n",
    "- Run `calc-story-distribution` (not yet completed) to calculate the story distribution for a department's census-tract-based jurisdiction\n",
    "    - Updates `FireDepartmentRiskModels.floor_count_coefficients`\n",
    "    - Depends on NFIRS.LUSE_swg - copied from parcels.LUSE_swg, NFIRS.parcel_stories - subset of parcels.parcels, FC FireDepartment.owned_tracts_geom\n",
    "- Run `import-predictions` to import predictions from Stanley's prediction model\n",
    "    - Depends on predictions.csv file, FC FireDepartment\n",
    "    - Recalculates the DIST score for EACH hazard level for FC FireDepartment\n",
    "- Re-compute `population_quartiles` materialized view based on the imported information\n",
    "    - Depends on FC Firedepartment, FC FireDepartmentRiskModels, FC NFIRSStatistic\n",
    "- Run `load-dist-scores` to perform the DIST score calculation per department\n",
    "    - Depends on NFIRS.buildingfires, NFIRS.incidentaddress, NFIRS.parcel_risk_category_local - copy of parcels.parcels w/ subset of columns + parcel risk level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Clear out FireDepartmentRiskModels since all of this information is derived\n",
    "FireDepartmentRiskModels.objects.all().delete()\n",
    "NFIRSStatistic.objects.all().delete()\n",
    "\n",
    "with connections['default'].cursor() as c:\n",
    "    c.execute(\"refresh materialized view population_quartiles;\")\n",
    "\n",
    "assert PopulationClassQuartile.objects.count() == 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "similar_departments = list(FireDepartment.objects.filter(id__in=fd.similar_departments.values_list('id', flat=True))) + [fd]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for f in similar_departments:\n",
    "    update_nfirs_counts.delay(f.id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Similar departments to Houston..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(map(lambda x: (x.id, x.name, x.population, x.region), similar_departments), columns=['id', 'name', 'population', 'region'])\n",
    "df.sort('population', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.read_sql_query(\"select * from firestation_nfirsstatistic;\", connections['default'])\n",
    "department_count = len(df['fire_department_id'].unique())\n",
    "year_count = len(df['year'].unique())\n",
    "metric_type_count = len(df['metric'].unique())\n",
    "# Should have 5 risk levels (low, medium, high, unknown, all) for each metrics for every year for each department\n",
    "assert len(df) == department_count * 5 * metric_type_count * year_count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Now that we have NFIRS statistics (which is a dependency of the quartile calculations), we can continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# NOTE: this takes some time...\n",
    "for f in similar_departments:\n",
    "    calculate_department_census_geom.delay(f.id)\n",
    "fd.refresh_from_db()\n",
    "display_geom(fd.owned_tracts_geom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# NOTE: this takes some time...\n",
    "from django.core.management import call_command\n",
    "\n",
    "dept_ids = map(lambda x: str(x.id), similar_departments)\n",
    "call_command('import-predictions', '../predictions.2015.csv', '--ids', *dept_ids)  # Also updates the performance score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with connections['default'].cursor() as c:\n",
    "    c.execute('refresh materialized view population_quartiles;')\n",
    "    \n",
    "PopulationClassQuartile.objects.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# NOTE: this takes a LONG time... (like hours)\n",
    "for f in similar_departments:\n",
    "    calculate_structure_counts.delay(f.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for f in similar_departments:\n",
    "    update_performance_score.delay(f.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "q = \"\"\"SELECT *\n",
    "    FROM crosstab(\n",
    "      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)\n",
    "        FROM buildingfires a left join (\n",
    "          SELECT state,\n",
    "            fdid,\n",
    "            inc_date,\n",
    "            inc_no,\n",
    "            exp_no,\n",
    "            geom,\n",
    "            x.parcel_id,\n",
    "            x.risk_category\n",
    "          FROM (select * from incidentaddress a\n",
    "             left join parcel_risk_category_local b using (parcel_id)\n",
    "             ) AS x\n",
    "        ) AS y using (state, inc_date, exp_no, fdid, inc_no)\n",
    "    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')\n",
    "        and fire_sprd is not null and fire_sprd != ''''\n",
    "    group by risk_category, fire_sprd\n",
    "    order by risk_category, fire_sprd ASC')\n",
    "    AS ct(risk_category text, \"object_of_origin\" bigint, \"room_of_origin\" bigint, \"floor_of_origin\" bigint, \"building_of_origin\" bigint, \"beyond\" bigint);\"\"\"\n",
    "\n",
    "with connections['nfirs'].cursor() as c:\n",
    "    c.execute(q, {'state': fd.state, 'fdid': fd.fdid})\n",
    "    results = dictfetchall(c)\n",
    "\n",
    "all_counts = dict(object_of_origin=0,\n",
    "                  room_of_origin=0,\n",
    "                  floor_of_origin=0,\n",
    "                  building_of_origin=0,\n",
    "                  beyond=0)\n",
    "\n",
    "risk_mapping = {'Low': 1, 'Medium': 2, 'High': 4, 'N/A': 5}\n",
    "\n",
    "print 'ALL RESULTS'\n",
    "df = pd.DataFrame(results)\n",
    "display(df)\n",
    "\n",
    "for result in results:\n",
    "    print result.get('risk_category')\n",
    "    dist_model = dist_model_for_hazard_level(result.get('risk_category'))\n",
    "    \n",
    "    counts = dict(object_of_origin=result.get('object_of_origin', 0),\n",
    "                  room_of_origin=result.get('room_of_origin', 0),\n",
    "                  floor_of_origin=result.get('floor_of_origin', 0),\n",
    "                  building_of_origin=result.get('building_of_origin', 0),\n",
    "                  beyond=result.get('beyond', 0))\n",
    "    \n",
    "    display(counts)\n",
    "\n",
    "    # add current risk category to the all risk category\n",
    "    for key, value in counts.items():\n",
    "        all_counts[key] += value\n",
    "        \n",
    "    ahs_building_size = ahs_building_areas(fd.fdid, fd.state)\n",
    "\n",
    "    if ahs_building_size is not None:\n",
    "        counts['building_area_draw'] = ahs_building_size\n",
    "\n",
    "    response_times = response_time_distributions.get('{0}-{1}'.format(fd.fdid, fd.state))\n",
    "\n",
    "    if response_times:\n",
    "        counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)\n",
    "        \n",
    "    dist = dist_model(floor_extent=False, **counts)\n",
    "    print 'SCORE: {}'.format(dist.gibbs_sample())\n",
    "\n",
    "    \n",
    "dist_model = dist_model_for_hazard_level('All')\n",
    "\n",
    "if ahs_building_size is not None:\n",
    "    all_counts['building_area_draw'] = ahs_building_size\n",
    "    \n",
    "if response_times:\n",
    "    all_counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)\n",
    "    \n",
    "dist = dist_model(floor_extent=False, **all_counts)\n",
    "    \n",
    "display(all_counts)\n",
    "print 'SCORE: {}'.format(dist.gibbs_sample())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "update_performance_score.delay(fd.id)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Score sanity checks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "! rm scores.db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Using sqlite3 since this might be a long-running process and would hate to have to restart\n",
    "conn = sqlite3.connect('scores.db')\n",
    "create = \"\"\"\n",
    "CREATE TABLE scores(\n",
    "    fc_dept_id INT NOT NULL,\n",
    "    name TEXT NOT NULL,\n",
    "    low_count REAL NOT NULL,\n",
    "    med_count REAL NOT NULL,\n",
    "    high_count REAL NOT NULL,\n",
    "    unk_count REAL NOT NULL,\n",
    "    low_percent REAL NOT NULL,\n",
    "    med_percent REAL NOT NULL,\n",
    "    high_percent REAL NOT NULL,\n",
    "    unk_percent REAL NOT NULL,\n",
    "    target REAL NOT NULL,\n",
    "    low_score REAL NOT NULL,\n",
    "    med_score REAL NOT NULL,\n",
    "    high_score REAL NOT NULL,\n",
    "    unk_score REAL NOT NULL,\n",
    "    all_score REAL NOT NULL,\n",
    "    all_score_diff REAL NOT NULL,\n",
    "    diff_percentage REAL NOT NULL);\n",
    "\"\"\"\n",
    "conn.cursor().execute(create)\n",
    "                      \n",
    "                   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "conn = sqlite3.connect('scores.db')\n",
    "qcounts = \"\"\"SELECT *\n",
    "    FROM crosstab(\n",
    "      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)\n",
    "        FROM buildingfires a left join (\n",
    "          SELECT state,\n",
    "            fdid,\n",
    "            inc_date,\n",
    "            inc_no,\n",
    "            exp_no,\n",
    "            geom,\n",
    "            x.parcel_id,\n",
    "            x.risk_category\n",
    "          FROM (select * from incidentaddress a\n",
    "             left join parcel_risk_category_local b using (parcel_id)\n",
    "             ) AS x\n",
    "        ) AS y using (state, inc_date, exp_no, fdid, inc_no)\n",
    "    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')\n",
    "        and fire_sprd is not null and fire_sprd != ''''\n",
    "    group by risk_category, fire_sprd\n",
    "    order by risk_category, fire_sprd ASC')\n",
    "    AS ct(risk_category text, \"object_of_origin\" bigint, \"room_of_origin\" bigint, \"floor_of_origin\" bigint, \"building_of_origin\" bigint, \"beyond\" bigint);\"\"\"\n",
    "\n",
    "ins = \"\"\"INSERT INTO scores VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);\"\"\"\n",
    "\n",
    "def already_in(fd_id):\n",
    "    return conn.cursor().execute('SELECT fc_dept_id FROM scores WHERE fc_dept_id = ?', (fd_id,)).fetchone() is not None\n",
    "\n",
    "for fd in FireDepartment.priority_departments.all():\n",
    "    if not already_in(fd.id):\n",
    "        counts = pd.read_sql_query(qcounts, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})\n",
    "        df2 = counts.set_index('risk_category')\n",
    "        counts = df2.sum(axis=1).to_dict()\n",
    "        chigh, clow, cmed, cna = counts.get('High', 0), counts.get('Low', 0), counts.get('Medium', 0), counts.get('N/A', 0)\n",
    "        tot = float(chigh + clow + cmed + cna)\n",
    "        if tot:\n",
    "            hper = chigh / tot\n",
    "            lper = clow / tot\n",
    "            mper = cmed / tot\n",
    "            nper = cna / tot\n",
    "            low = fd.metrics.dist_model_score.low or 0\n",
    "            medium = fd.metrics.dist_model_score.medium or 0\n",
    "            high = fd.metrics.dist_model_score.high or 0\n",
    "            unk = fd.metrics.dist_model_score.unknown or 0\n",
    "            _all = fd.metrics.dist_model_score.all or 0\n",
    "            target = hper * high + lper * low + mper * medium + nper * unk\n",
    "            off = _all - target\n",
    "            diff_per = off / _all if _all else 9e999\n",
    "\n",
    "            args = (fd.id, fd.name, clow, cmed, chigh, cna, lper, mper, hper, nper, target, low, medium, high, unk, _all, off, diff_per)\n",
    "            print 'Adding counts for {}'.format(fd.name)\n",
    "            conn.cursor().execute(ins, args) \n",
    "            conn.commit()\n",
    "        else:\n",
    "            print 'No counts for {}'.format(fd.name)\n",
    "    else:\n",
    "        print 'Skipping {}, already in...'.format(fd.name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "conn = sqlite3.connect('scores.db')\n",
    "df = pd.read_sql_query(\"select * from scores;\", conn)\n",
    "df = df.sort('diff_percentage', ascending=False)\n",
    "display(df)\n",
    "df.to_csv('/tmp/scores.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
